#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
import os
import time
from requests import HTTPError

from spider.requests_helper import Request
from lxml import html
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium import webdriver


def spider(page):
    """ 爬取yyrr12网站视频的访问地址 """
    url = "http://www.ixinmo.com/shu/1786/{page}.html".format(page=page)
    # 获得html内容
    html_text = ''
    retry_count = 5
    for i in range(1, retry_count):
        if html_text != '':
            break
        try:
            driver.get(url)
            # 获得html内容
            html_text = driver.page_source
        except HTTPError:
            print("获得html内容发送错误进行重试")

    # 获得xpath对象
    selector = html.fromstring(html_text)

    source_list = selector.xpath('//div[@class="player"]/audio/source/@src')

    return source_list[0]


def download_file(url, path):
    file = requests.get(url)
    index = path.rindex("/")
    folder = path[:index]
    if not os.path.exists(folder):
        os.mkdir(folder)

    with open(path, 'wb') as f:
        f.write(file.content)


if __name__ == '__main__':
    # chrome_driver = "D:/CareerSoftware/chromedriver/chromedriver.exe"
    # driver = webdriver.Chrome(executable_path=chrome_driver)
    d = DesiredCapabilities.CHROME
    d['loggingPrefs'] = {'performance': 'ALL'}
    # 设置谷歌浏览器参数，设置语言和浏览器版本（使用真实浏览器头信息代替字符浏览器头）
    options = webdriver.ChromeOptions()
    # options.add_argument('lang=zh_CN.UTF-8')
    # 设置静音
    options.add_argument('--mute-audio')
    driver = webdriver.Chrome(chrome_options=options)

    path = 'G:/BaiduNetdiskDownload/老子是癞蛤蟆/{page}.{suffix}'
    total = 420
    start_page = 1
    start_index = 50
    end_index = total + 1
    for i in range(start_index, end_index):
        time.sleep(10 * 60)
        src = str(spider(i))
        suffix = src[src.rfind('.')+1:]
        download_file(src, path.format(page=i, suffix=suffix))
        print("下载{page}集成功".format(page=i))
